; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s | FileCheck %s

; This test has multiple opportunities for SimplifyDemandedBits after type
; legalization. There are 2 opportunities on the chain feeding the LHS of the
; shl. And one opportunity on the shift amount. We previously weren't managing
; the DAGCombiner worklist correctly and failed to get the RHS.
define i32 @foo(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: foo:
; CHECK:       # %bb.0:
; CHECK-NEXT:    mul a0, a0, a0
; CHECK-NEXT:    addiw a0, a0, 1
; CHECK-NEXT:    mul a0, a0, a0
; CHECK-NEXT:    add a0, a0, a2
; CHECK-NEXT:    addiw a0, a0, 1
; CHECK-NEXT:    sllw a0, a0, a1
; CHECK-NEXT:    ret
  %b = mul i32 %x, %x
  %c = add i32 %b, 1
  %d = mul i32 %c, %c
  %e = add i32 %d, %z
  %f = add i32 %e, 1
  %g = shl i32 %f, %y
  ret i32 %g
}

; The sign bit of an nsw self multiply is 0. Make sure we can use this to
; convert the AND constant to -8.
define i64 @mul_self_nsw_sign(i64 %x) {
; CHECK-LABEL: mul_self_nsw_sign:
; CHECK:       # %bb.0:
; CHECK-NEXT:    mul a0, a0, a0
; CHECK-NEXT:    andi a0, a0, -8
; CHECK-NEXT:    ret
  %a = mul nsw i64 %x, %x
  %b = and i64 %a, 9223372036854775800
  ret i64 %b
}

; Make sure we sign extend the constant after type legalization to allow the
; use of ori.
define void @ori(ptr nocapture noundef %0) {
; CHECK-LABEL: ori:
; CHECK:       # %bb.0:
; CHECK-NEXT:    lw a1, 0(a0)
; CHECK-NEXT:    ori a1, a1, -2
; CHECK-NEXT:    sw a1, 0(a0)
; CHECK-NEXT:    ret
  %2 = load i32, ptr %0, align 4
  %3 = or i32 %2, -2
  store i32 %3, ptr %0, align 4
  ret void
}

; Make sure we sign extend the constant after type legalization to allow the
; use of xori.
define void @xori(ptr nocapture noundef %0) {
; CHECK-LABEL: xori:
; CHECK:       # %bb.0:
; CHECK-NEXT:    lw a1, 0(a0)
; CHECK-NEXT:    xori a1, a1, -5
; CHECK-NEXT:    sw a1, 0(a0)
; CHECK-NEXT:    ret
  %2 = load i32, ptr %0, align 4
  %3 = xor i32 %2, -5
  store i32 %3, ptr %0, align 4
  ret void
}

; Make sure we sign extend the constant after type legalization to allow the
; shorter constant materialization.
define void @or_signbit(ptr nocapture noundef %0) {
; CHECK-LABEL: or_signbit:
; CHECK:       # %bb.0:
; CHECK-NEXT:    lw a1, 0(a0)
; CHECK-NEXT:    lui a2, 524288
; CHECK-NEXT:    or a1, a1, a2
; CHECK-NEXT:    sw a1, 0(a0)
; CHECK-NEXT:    ret
  %2 = load i32, ptr %0, align 4
  %3 = or i32 %2, -2147483648
  store i32 %3, ptr %0, align 4
  ret void
}

; Make sure we sign extend the constant after type legalization to allow the
; shorter constant materialization.
define void @xor_signbit(ptr nocapture noundef %0) {
; CHECK-LABEL: xor_signbit:
; CHECK:       # %bb.0:
; CHECK-NEXT:    lw a1, 0(a0)
; CHECK-NEXT:    lui a2, 524288
; CHECK-NEXT:    xor a1, a1, a2
; CHECK-NEXT:    sw a1, 0(a0)
; CHECK-NEXT:    ret
  %2 = load i32, ptr %0, align 4
  %3 = xor i32 %2, -2147483648
  store i32 %3, ptr %0, align 4
  ret void
}

; Type legalization inserts a sext_inreg after the sub. This causes the
; constant for the AND to be turned into 0xfffffff8. Then SimplifyDemandedBits
; removes the sext_inreg from the path to the store. This prevents
; TargetShrinkDemandedConstant from being able to restore the lost upper bits
; from the and mask to allow andi. ISel is able to recover the lost sext_inreg
; using hasAllWUsers. We also use hasAllWUsers to recover the ANDI.
define signext i32 @andi_sub_cse(i32 signext %0, i32 signext %1, ptr %2) {
; CHECK-LABEL: andi_sub_cse:
; CHECK:       # %bb.0:
; CHECK-NEXT:    andi a0, a0, -8
; CHECK-NEXT:    subw a0, a0, a1
; CHECK-NEXT:    sw a0, 0(a2)
; CHECK-NEXT:    ret
  %4 = and i32 %0, -8
  %5 = sub i32 %4, %1
  store i32 %5, ptr %2, align 4
  ret i32 %5
}

define signext i32 @addi_sub_cse(i32 signext %0, i32 signext %1, ptr %2) {
; CHECK-LABEL: addi_sub_cse:
; CHECK:       # %bb.0:
; CHECK-NEXT:    subw a0, a0, a1
; CHECK-NEXT:    addiw a0, a0, -8
; CHECK-NEXT:    sw a0, 0(a2)
; CHECK-NEXT:    ret
  %4 = add i32 %0, -8
  %5 = sub i32 %4, %1
  store i32 %5, ptr %2, align 4
  ret i32 %5
}

define signext i32 @xori_sub_cse(i32 signext %0, i32 signext %1, ptr %2) {
; CHECK-LABEL: xori_sub_cse:
; CHECK:       # %bb.0:
; CHECK-NEXT:    xori a0, a0, -8
; CHECK-NEXT:    subw a0, a0, a1
; CHECK-NEXT:    sw a0, 0(a2)
; CHECK-NEXT:    ret
  %4 = xor i32 %0, -8
  %5 = sub i32 %4, %1
  store i32 %5, ptr %2, align 4
  ret i32 %5
}

define signext i32 @ori_sub_cse(i32 signext %0, i32 signext %1, ptr %2) {
; CHECK-LABEL: ori_sub_cse:
; CHECK:       # %bb.0:
; CHECK-NEXT:    ori a0, a0, -8
; CHECK-NEXT:    subw a0, a0, a1
; CHECK-NEXT:    sw a0, 0(a2)
; CHECK-NEXT:    ret
  %4 = or i32 %0, -8
  %5 = sub i32 %4, %1
  store i32 %5, ptr %2, align 4
  ret i32 %5
}

; SimplifyDemandedBits breaks the ANDI by turning -8 into 0xfffffff8. This
; gets CSEd with the AND needed for type legalizing the lshr. This increases
; the use count of the AND with 0xfffffff8 making TargetShrinkDemandedConstant
; unable to restore it to 0xffffffff for the lshr and -8 for the AND to use
; ANDI.
; Instead we rely on ISel to form srliw even though the AND has multiple uses
; and the mask has missing 1s where bits will be shifted out. This reduces the
; use count of the AND and we can use hasAllWUsers to form ANDI.
define signext i32 @andi_srliw(i32 signext %0, ptr %1, i32 signext %2) {
; CHECK-LABEL: andi_srliw:
; CHECK:       # %bb.0:
; CHECK-NEXT:    andi a3, a0, -8
; CHECK-NEXT:    srliw a4, a0, 3
; CHECK-NEXT:    addw a0, a3, a2
; CHECK-NEXT:    sw a4, 0(a1)
; CHECK-NEXT:    ret
  %4 = and i32 %0, -8
  %5 = lshr i32 %0, 3
  store i32 %5, ptr %1, align 4
  %6 = add i32 %4, %2
  ret i32 %6
}

define i32 @and_or(i32 signext %x) {
; CHECK-LABEL: and_or:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    ori a0, a0, 255
; CHECK-NEXT:    slli a0, a0, 48
; CHECK-NEXT:    srli a0, a0, 48
; CHECK-NEXT:    ret
entry:
  %and = and i32 %x, 65280
  %or = or i32 %and, 255
  ret i32 %or
}
